library(foreign)
set.seed(223456789)
fnum=9

outnames<- c("v2res1.dta", "v2res2.dta", "v2res3.dta", "v2res4.dta", "v2res5.dta", "v2res6.dta", "v2res7.dta", "v2res8.dta", "v2res9.dta", "v2res10.dta") 

print("START")
print(Sys.time())

childdata<- read.dta("dataset_child_for_R_2.dta")
attach(childdata)

boysamp<- data.frame(childdata[sex==1,])
girlsamp<- data.frame(childdata[sex==2,])

names(boysamp)<-c("sex", "educ.attain")
names(girlsamp)<- names(boysamp)
boys_educ_demean<- matrix(data=boysamp$educ.attain - mean(boysamp$educ.attain, nrow=length(boys_educ_demean), ncol=1))
girls_educ_demean<- matrix(data=girlsamp$educ.attain - mean(girlsamp$educ.attain, nrow=length(girls_educ_demean), ncol=1))

adultdata<- read.dta("dataset_adult_for_R_2.dta")
attach(adultdata)

adultsamp<- data.frame(adultdata)

names(adultsamp)<-c("educ.attain.husb", "educ.attain.wife")

husb_educ_demean <- adultsamp$educ.attain.husb - mean(adultsamp$educ.attain.husb)
wife_educ_demean <- adultsamp$educ.attain.wife - mean(adultsamp$educ.attain.wife)

temp<- matrix(data=cbind(husb_educ_demean, wife_educ_demean), nrow=nrow(adultsamp), ncol=2)
adultsamp_demean<- data.frame(temp)
names(adultsamp_demean)<- c("educ.attain.husb.demean", "educ.attain.wife.demean")

N_boy <- nrow(boysamp)
N_girl <- nrow(girlsamp)
N_adult<- nrow(adultsamp)

nreps<- 5
rho<-0
nnames <- 5000

RES <- matrix(data=0, nrow=1, 23) 

rsq<- function(y, i) {
	orig<- data.frame(matrix(c(i,y), nrow=length(y), ncol=2))
	names(orig)<- (c("i", "y"))
	new<- aggregate(y, list(i), FUN="mean")
	names(new)<- c("i.new", "ybar")
	combo<- merge(orig, new, by.x="i", by.y="i.new")
	reg<- lm(combo$y ~ combo$ybar)
	err<- sum(reg$res^2)
	tot<- sum((y-mean(y))^2)
	r<- 1-(err/tot)
	return(r)
	}



name.assign<- function(X, N, delta_mat) {
	
		#inialize name matrix
		name<- matrix(data=0, nrow=N, ncol=1)
	
		x <- matrix(data=c(rep(1, N), X), nrow=N, ncol=2) 
		p<- matrix(data=exp(x%*% delta_mat) / (rowSums(exp(x %*% delta_mat))), nrow=N, ncol=N_names)
		cu.p<- matrix(data=rep(0, length(p)), nrow=N, ncol=N_names)
		for (j in 2:N_names) cu.p[,j]<-cu.p[,j-1]+p[,j] 
		name_temp<- matrix(data=0, nrow=N, ncol=N_names)
		namevec<- matrix(data=runif(N), nrow=N, ncol=1)
		name_temp[,N_names]<- (cu.p[,N_names]<namevec)
		n<- N_names-1
		for (j in 1:n) {
				name_temp[,j]<- (cu.p[,j]<namevec & cu.p[,j+1]>=namevec)
			}
			name_temp_2<- matrix(data=c(1:N_names), nrow=N_names, ncol=1)
			name<- name_temp%*%name_temp_2
		return(name)
	}
	
for (N_names in nnames) {
	
	for (sigma2_con_m in c(5)) {
		
		for(sigma2_ses_m in c(0, 0.005, 0.01, 0.015, 0.02, 0.025, 0.05, 0.1)) {
			
			for (sigma2_con_f in c(5)) {
		
				for(sigma2_ses_f in c(0, 0.005, 0.01, 0.015, 0.02, 0.025, 0.05, 0.1)) {
		
		for (rep in c(1:nreps)) {

delta_con_m <- sqrt(sigma2_con_m)*rnorm(N_names)
delta_ses_m <- (rho*sigma2_ses_m/sigma2_con_m)*delta_con_m + sqrt(sigma2_ses_m/(1-rho^2))*rnorm(N_names)

delta_con_f <- sqrt(sigma2_con_f)*rnorm(N_names)
delta_ses_f <- (rho*sigma2_ses_f/sigma2_con_f)*delta_con_m + sqrt(sigma2_ses_f/(1-rho^2))*rnorm(N_names)

delta_mat_m <- t(cbind(delta_con_m, delta_ses_m))
delta_mat_f <- t(cbind(delta_con_f, delta_ses_f))

names_boy<- name.assign(boys_educ_demean, nrow(boys_educ_demean), delta_mat_m)
names_girl<- name.assign(girls_educ_demean, nrow(girls_educ_demean), delta_mat_f)
names_husb<- name.assign(adultsamp_demean$educ.attain.husb.demean, nrow(adultsamp_demean), delta_mat_m)
names_wife<- name.assign(adultsamp_demean$educ.attain.wife.demean, nrow(adultsamp_demean), delta_mat_f)

boysamp_named<- data.frame(cbind(boysamp, names_boy))
names(boysamp_named)<- c(names(boysamp), "assigned.name.boy")
girlsamp_named<- data.frame(cbind(girlsamp, names_girl))
names(girlsamp_named)<- c(names(girlsamp), "assigned.name.girl")

adultsamp_named<- data.frame(cbind(adultsamp, names_husb, names_wife))
names(adultsamp_named)<- c(names(adultsamp), "assigned.name.husb", "assigned.name.wife")

boy_means<- aggregate(boysamp_named$educ.attain, list(boysamp_named$assigned.name.boy), FUN=mean)
boy_means<- data.frame(boy_means)
names(boy_means) <- c("assigned.name.boy", "educ.pseudo.husb")

girl_means<- aggregate(girlsamp_named$educ.attain, list(girlsamp_named$assigned.name.girl), FUN=mean)
girl_means<- data.frame(girl_means)
names(girl_means) <- c("assigned.name.girl", "educ.pseudo.wife")

temp<- merge(adultsamp_named, girl_means, by.x="assigned.name.wife", by.y="assigned.name.girl")

merged_adult<- merge(temp, boy_means, by.x="assigned.name.husb", by.y="assigned.name.boy")

merged_adult<- data.frame(merged_adult)
names(merged_adult)<- c("assigned.name.husb", "assigned.name.wife", "educ.attain.husb", "educ.attain.wife", "educ.pseudo.wife", "educ.pseudo.husb")	
		
name_counter_husb<- data.frame(cbind(adultsamp_named$assigned.name.husb, rep(1, nrow(adultsamp_named))))
name_counter_wife<- data.frame(cbind(adultsamp_named$assigned.name.wife, rep(1, nrow(adultsamp_named))))

names(name_counter_husb)<- c("assigned.name.husb", "ones")	
names(name_counter_wife)<- c("assigned.name.wife", "ones")	
namecounts_husb<- aggregate(name_counter_husb$ones, list(name_counter_husb$assigned.name.husb), FUN=sum)	
namecounts_wife<- aggregate(name_counter_wife$ones, list(name_counter_wife$assigned.name.wife), FUN=sum)

namecounts_husb<- data.frame(namecounts_husb)
namecounts_wife<- data.frame(namecounts_wife)
names(namecounts_husb)<- c("assigned.name.husb", "count")
names(namecounts_wife)<- c("assigned.name.wife", "count")

sortname_husb<- sort(namecounts_husb$count, decreasing=TRUE)
num<- sum(sortname_husb[1:50])
denom<- sum(sortname_husb)

n_names_linked_husb<- length(sortname_husb)

share50_husb <- num/denom

sortname_wife<- sort(namecounts_wife$count, decreasing=TRUE)
num<- sum(sortname_wife[1:50])
denom<- sum(sortname_wife)

n_names_linked_wife<- length(sortname_wife)

share50_wife <- num/denom
		
### Record: (1) r-squared (2) share top 50	(3) regression of husband's education on wife's eduction quartiles (4) variance of pseudo education	(5) correlation of husband's and wife's pseudo education

merged_adult_clean<- data.frame(na.omit(merged_adult))
names(merged_adult_clean)<- names(merged_adult)

n_adult_linked<- nrow(merged_adult_clean)

q<- quantile(merged_adult_clean$educ.pseudo.wife)
q1<- merged_adult_clean$educ.pseudo.wife < q[2]
n_q1<- sum(q1)
q2<- merged_adult_clean$educ.pseudo.wife < q[3] & merged_adult_clean$educ.pseudo.wife >= q[2]
n_q2<- sum(q2)
q3<- merged_adult_clean$educ.pseudo.wife < q[4] & merged_adult_clean$educ.pseudo.wife >= q[3]
n_q3<- sum(q3)
q4<- merged_adult_clean$educ.pseudo.wife >=q[4]
n_q4<- sum(q4)

merged_adult_quartile<- matrix(data=cbind(merged_adult_clean$educ.pseudo.husb, merged_adult_clean$educ.pseudo.wife, q1, q2, q3, q4), nrow=nrow(merged_adult_clean), ncol=6)
merged_adult_quartile<- data.frame(merged_adult_quartile)
names(merged_adult_quartile)<- c("educ.pseudo.husb", "educ.pseudo.wife", "q1", "q2", "q3", "q4")

corr.pseudo<- cor(merged_adult_clean$educ.pseudo.husb, merged_adult_clean$educ.pseudo.wife)
var.pseudo.m<- var(merged_adult_clean$educ.pseudo.husb)
var.pseudo.f<- var(merged_adult_clean$educ.pseudo.wife)

qtile.reg<- lm(educ.pseudo.husb ~ q2 + q3 + q4, data=merged_adult_quartile)
beta.q2<- qtile.reg$coefficients[2]
beta.q3<- qtile.reg$coefficients[3]
beta.q4<- qtile.reg$coefficients[4]

rsq.boy <- rsq(boysamp_named$educ.attain, boysamp_named$assigned.name.boy)
rsq.girl <- rsq(girlsamp_named$educ.attain, girlsamp_named$assigned.name.girl)

newrow<- matrix(data=c(fnum, rep, sigma2_con_m, sigma2_ses_m, sigma2_con_f, sigma2_ses_f, corr.pseudo, var.pseudo.m, var.pseudo.f, rsq.boy, rsq.girl, share50_husb, share50_wife, n_names_linked_husb, n_names_linked_wife, N_adult, n_adult_linked, beta.q2, beta.q3, beta.q4, n_q2, n_q3, n_q4), nrow=1, ncol=23)

print(newrow)

RES<- rbind(RES, newrow)

print("ENDS at")
print(Sys.time())		

			} #closes reps	
		}	#closes sigma2_ses_f		
	}	#closes sigma2_con_f				
		}	#closes sigma2_ses_m		
	}	#closes sigma2_con_m
}	#closes N_names

RES<- RES[-1,]
RES.data<- data.frame(RES)

names(RES.data)<- c("file_num", "iteration", "sigma2_con_m", "sigma2_ses_m", "sigma2_con_f", "sigma2_ses_f", "corr_pseudo", "var_pseudo_m", "var_pseudo_f", "rsq_boy", "rsq_girl", "share50_husb", "share50_wife", "n_names_linked_husb", "n_names_linked_wife", "N_adult", "n_adult_linked", "beta_q2", "beta_q3", "beta_q4", "n_q2", "n_q3", "n_q4")

write.dta(RES.data, file=outnames[fnum])
